# coding=utf8
import os
import time
from io import BytesIO

import requests
import tldextract
from PIL import Image
from bs4 import BeautifulSoup

from models.lazada_item import model_lazada_item


def search(url):
    page = 0
    html_tpl = "<html><head></head><body><table>" \
               "<tr>" \
               "<th>idx</th><th>产品名</th><th>图</th><th>功能</th><th>价格</th><th>包装</th><th>链接</th>" \
               "</tr>" \
               "{trs}" \
               "</table></body></html>"
    tr_tpl = "<tr><td>{idx}</td><td>{title}</td><td>" \
             "<img src='{img}' style='width: 280px;'></td>" \
             "<td>{content}</td><td>{price}</td><td>{packet}</td><td>{href}</td></tr>"
    trs = []
    n = 0
    next_page = True
    search_target = url.split('/')[-2]
    base_img_path = 'img/' + search_target + '/'
    if not os.path.exists('data/' + base_img_path):
        os.mkdir('data/' + base_img_path)
    while True:
        page += 1
        print(page)
        page_url = url + '&page=' + str(page)
        for item in get_items(page_url):
            if not item:
                next_page = False
                break

            img_path = item['img']
            try:
                r = requests.get(item['img'])
                i = Image.open(BytesIO(r.content))
                img_path = base_img_path + str(n) + '.jpg'
                i.save('data/' + img_path)
            except:
                print("图片保存失败")

            trs.append(tr_tpl.format(
                idx=n,
                title=item['title'],
                img=img_path,
                price=item['price'],
                packet=item['packet'],
                content=item['content'],
                href=item['url']
            ))
            n += 1
            time.sleep(1)
        time.sleep(2)
        if not next_page:
            break
    with open('data/' + search_target + '_lazada_shop_items.html', 'w', encoding='utf8') as f:
        f.write(html_tpl.format(trs="".join(trs)))


header = {
    'Connection': 'keep-alive',
    'Cookie': 'anon_uid=ab69eeb5-87c7-4f7a-456f-f1785029db37; ga_exp_leila=Boosting_June14||0|||ID-Voucher_Banner_Relevance||0; browserDetection=eyJ0eXBlIjoiYnJvd3NlciIsIm5hbWUiOiJDaHJvbWUiLCJjc3NDbGFzcyI6ImNocm9tZSIsInZlcnNpb24iOiI1MyIsIm9zIjoibWFjIn0%3D; _lzdTracker=t8nc1ukqe769ugwfrrrizfr; AMCV_126E248D54200F960A4C98C6%40AdobeOrg=-1506950487%7CMCMID%7C07518433045843938801464388409864510584%7CMCAAMLH-1469794752%7C9%7CMCAAMB-1469794752%7CNRX38WO0n5BH8Th-nqAG_A%7CMCAID%7CNONE; s_vnum=1500725953123%26vn%3D1; _tsm=m%3DDirect%2520%252F%2520Brand%2520Aware%253A%2520Typed%2520%252F%2520Bookmarked%2520%252F%2520etc%7Cs%3D%28none%29; is_new_user=0; __utmt=1; _dc_gtm_UA-29801013-7=1; PHPSESSID_f4467fd86a1d258ab0ca45f8ffed213a=3121ebb3bb381768e0b822362841f7f4; _ga=GA1.3.1880703260.1469189901; userLanguageML=id; fd_location_entered=Not+filled+in; lzd_first_visit=1; lzd_visitor_type=new; lzd_first_purchase=Unknown; __utmx=102046108.nDOEbAWDRBaBQK_26-h8Vg$0:0.2_TkNJ1QQSyQYzNG7dzlrQ$0:-1; __utmxx=102046108.nDOEbAWDRBaBQK_26-h8Vg$0:1469191189:8035200:.2_TkNJ1QQSyQYzNG7dzlrQ$0:1469191189:8035200; gpv_pn=subcategory%3AKesehatan%20Seksual; s_invisit=true; undefined_s=First%20Visit; s_sq=%5B%5BB%5D%5D; s_cc=true; _vwo_uuid_v2=D3F9BBE14A1CC33DF538DC6864D32257|1fba072d8a1af00bc1a0e568d9247140; __utma=102046108.1880703260.1469189901.1469189901.1469189901.1; __utmb=102046108.21.9.1469191194335; __utmc=102046108; __utmz=102046108.1469189901.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); session_counter=1; rr_rcs=eF4Nx6sVgDAQBECDopflsffhLh3QRkgiEDigfhg303y999mX0ALaVlgozrB_DnB62t5zHKlSQdWArRlIN4OKe7Q-hDU_XMEQ1w; _ceg.s=oapv84; _ceg.u=oapv84; daily_visit_count=8|1469276313384; s_ppvl=D%253Dch%2B%2522%253Aobat-kuat-sex-pria-perkasa-tahan-lama-cialis-50-mg-2994668%2522%2C48%2C48%2C1935%2C1280%2C592%2C1280%2C800%2C1%2CP; s_ppv=D%253Dch%2B%2522%253AKesehatan%2520Seksual%2522%2C8%2C8%2C636%2C817%2C636%2C1280%2C800%2C1%2CP',
    # 'Host': 'www.lazada.co.id',
    'Pragma': 'no-cache',
    # 'Referer': 'http://www.lazada.co.id/beli-kesehatan-seksual/?itemperpage=120',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.8 Safari/537.36',
}


def get_items(page_url):
    soup = BeautifulSoup(requests.get(page_url, headers=header).text, 'lxml')
    a_taps = soup.select('div.component.component-product_list.product_list.grid.toclear > div > a')

    suffix = tldextract.extract(page_url)[2]

    for a in a_taps:
        print(a['href'])
        pid = a['href'].split('-')[-1].replace('.html', '')
        item = model_lazada_item.find_one({'pid': pid, 'suffix': suffix})
        if item is None:
            item = {}
            try:
                item_soup = BeautifulSoup(requests.get(a['href'], headers=header).text, 'lxml')
                item['pid'] = pid
                item['title'] = item_soup.select_one('#prod_title').get_text().replace(',', '，').replace("\n", ' ')
                item['price'] = item_soup.select_one('#special_price_box').get_text().replace(',', '')
                item['img'] = item_soup.select_one('div.productImage')['data-big']
                item['packet'] = item_soup.select_one('.inbox__item').get_text()
                item['content'] = str(item_soup.select_one('.prod_content'))
                item['url'] = a['href']
                item['suffix'] = suffix
                model_lazada_item.insert_one(item)
            except:
                print("获取物品出问题")
                time.sleep(5)
                continue
        yield item
    # 检测是否还有下一页
    if soup.select_one('.next_link') is None:
        yield False


if __name__ == '__main__':
    # search('http://www.lazada.co.id/beli-kesehatan-seksual/?itemperpage=120')
    search('http://www.lazada.com.my/sexual-health/?itemperpage=120')
